# packages
pacman::p_load(car, sjstats, jtools, agricolae, tidyverse, dplyr, tidyr, readxl, ggplot2)

# data
cancer = read_excel('DataSets.xlsx',
                    sheet = 'anova',
                    range = 'B11:D45')
head(cancer)
tail(cancer)
str(cancer)
cancer$Organ = as.factor(cancer$Organ)
str(cancer)

# define two new functions

cv = function(x){sd(x)/mean(x)*100}
se = function(x){sd(x)/sqrt(length(x))}

cancer %>% group_by(Organ) %>% 
  summarise(Mean = mean(Survival),
            SD = sd(Survival),
            'CV(%)' = cv(Survival),
            SE = se(Survival))

# Run anova
mod = aov(Survival ~ Organ, cancer)
summary(mod)

# diagnostic plot
plot(mod, 1)
plot(mod, 2)

qqPlot(mod$residuals)
boxplot(mod$residuals)

leveneTest(Survival ~ Organ, cancer)
qqPlot(Survival ~ Organ, cancer)
boxplot(Survival ~ Organ, cancer)

# Response CV
cv(cancer$Survival)

# Model CV = RMSE/mean(y)*100
# RMSE = Root Mean Squared Error
MSE = 784011
RMSE = sqrt(MSE)
Mean = mean(cancer$Survival)
model_cv = RMSE/Mean*100
model_cv

# Eta square: variance explained by the model
anova_stats(mod)
# Eta square = 0.195 indicates that 19.5% of the variance in survival can be explained by the affected organ.

# Post hoc analysis: Tukey HSD (Honest Significant Difference Test), DMRT, Bonferroni correction, lsd (least significant difference test)
# Tukey HSD can control FWER (Family Wise Error Rate)
# Three groups in our case: 5% level of significance is ensured for all group (FWER)
# Unadjusted: 3 groups will be concluded as 15%
# p = sufficient evidence to support the H0 = probability of error (Type I error = Chances of rejection of a true H0)
# Type II = chance of acceptance of a false H0
# Type II error is more dangerous than Type I error

# eta squared = effect size = treatment ss/total ss

summary(mod)
eta = 5899086/(5899086 + 24304330)
eta

# post hoc analysis
tukey = HSD.test(mod, trt = "Organ")
tukey

a = tukey$means %>% rownames_to_column("Organ")
b = tukey$groups %>% rownames_to_column("Organ")

a[c(1,2,5)]
b[c(1,3)]

figdata = left_join(a[c(1,2,5)], b[c(1,3)], by = "Organ")
figdata

figdata = figdata %>% mutate(
  LL = Survival - se*1.96,
  UL = Survival + se*1.96
)

figdata

ggplot(figdata) +
  aes(x = Organ, y = Survival, fill = Organ) +
  geom_col() +
  geom_errorbar(aes(ymin = LL, ymax = UL), width = 0.2) +
  geom_text(aes(y = UL+100, label = groups)) +
  theme_bw() +
  theme(legend.position = 'top')

ggsave('fig 1. Effect of organs affect on survisal.png',
       dpi = 1000, height = 5, width = 6, units = 'in')



